This is an IPF slide that was run on Xenium week of 10/27/23 and the core pre-designed library for some reason didn’t populate, only the 100 custom genes did.
library(Seurat)
Loading required package: SeuratObject
Loading required package: sp
Attaching package: ‘SeuratObject’
The following object is masked from ‘package:base’:
intersect
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(tidyverse)
-- Attaching core tidyverse packages -------------------------------------------------------------- tidyverse 2.0.0 --
v dplyr 1.1.3 v readr 2.1.4
v forcats 1.0.0 v stringr 1.5.0
v ggplot2 3.4.4 v tibble 3.2.1
v lubridate 1.9.3 v tidyr 1.3.0
v purrr 1.0.2
-- Conflicts -------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ipf1.xen <- LoadXenium('output-XETG00143__0010972__lung__20231025__000223/')
10X data contains more than one type and is being returned as a list containing matrices of each type.
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
Bceause of the core library fail, there are only 100 genes with anything close to meaningful data.
genes_to_use <- read.csv('limited_panel.csv')$Gene
DefaultAssay(ipf1.xen) <- 'Xenium'
ipf1.xen <- NormalizeData(ipf1.xen)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ipf1.xen <- FindVariableFeatures(ipf1.xen)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ipf1.xen <- ScaleData(ipf1.xen, features=genes_to_use)
Centering and scaling data matrix
|
| | 0%
|
|============================================================================================================| 100%
ipf1.xen <- RunPCA(ipf1.xen, npcs = 30, features = genes_to_use)
PC_ 1
Positive: ITGB6, ABCA3, KRT8, ALOX15B, PARP14, XBP1, AGR2, CSF3R, LGALS3, CEACAM6
VEGFA, GDF15, ATF4, ERN1, PRDX6, S100A9, PDE4D, FOXA2, CLDN4, IL13RA1
TGFB1, IFI6, IRF1, SMAD7, DUSP1, IL4R, RAF1, IRF3, DDIT3, SMAD3
Negative: COL1A1, CRISPLD2, INMT, SCN7A, POSTN, CTHRC1, TFF2, IL17A, CST4, PTGDR2
IL4, CLC, RNASE3, CLCA1, SIGLEC8, CST1, IL13, IFNG, CCR6, VCAM1
SERPINB2, IFNB1, CXCR3, PI16, IFNA1, MUC5AC, NOS2, MPO, TSLP, CCR3
PC_ 2
Positive: ITGB6, ALOX15B, ABCA3, AGR2, CEACAM6, GDF15, FOXA2, CLDN4, KRT8, CSF3R
XBP1, ALOX15, PDE4D, IL5, ERN2, FOXA3, ERN1, MMP1, CCR4, RORC
TFF3, WNT5A, MUC16, IRF4, SPRY2, CCR3, MUC5AC, ARG1, IFNB1, SERPINB2
Negative: DUSP1, FKBP5, KLF4, ITGA1, SMAD7, TGFB1, INMT, CRISPLD2, SMAD6, IL4R
TNFAIP3, COL1A1, SCN7A, ARRDC2, IL33, IRF1, IFI6, OAS1, FOXN3, IL13RA1
ATF4, EGLN3, POSTN, S100A8, IFITM1, FUT4, NOS3, LGALS3, GATA3, ITGA6
PC_ 3
Positive: COL1A1, CRISPLD2, SCN7A, INMT, PDE4D, ITGA1, CTHRC1, POSTN, RORA, VEGFA
SPRY1, IFITM1, SPRY2, WNT5A, ITGB6, GDF15, ATF4, IL33, ABCA3, VCAM1
CLDN4, ARNT, AGR2, FOXA2, ALOX15B, FOXN3, PRDX6, ERN1, EGLN3, IRF3
Negative: S100A9, S100A8, OAS1, IL17RB, KLF4, IRF1, IFI6, LGALS3, CSF3R, RORC
TNF, ARRDC2, SMAD3, TNFAIP3, PARP14, FUT4, KLF7, IL5, TGFB1, DUSP1
SMAD7, CX3CR1, CDKN2A, GATA3, DDIT3, MMP1, TBX21, KRT8, ARG1, IFNA1
PC_ 4
Positive: SMAD6, ITGA6, IL33, NOS3, EGLN3, IFITM1, DUSP1, SMAD7, ARRDC2, GATA3
IL4R, KLF4, ABCA3, AGR2, CLDN4, ITGA1, ITGB6, CEACAM6, GDF15, FOXA2
IL13RA1, IL6, CX3CR1, ALOX15B, ALOX15, PI16, PARP14, IRF1, TBX21, ERN2
Negative: SCN7A, COL1A1, LGALS3, S100A9, S100A8, CTHRC1, CRISPLD2, SPRY1, IL17RB, FUT4
DDIT3, FKBP5, SMAD3, PRDX6, CSF3R, ARNT, FOXN3, RORA, WNT5A, INMT
RORC, TGFB1, OAS1, CST1, RAF1, IL5, TNF, IRF3, CST4, XBP1
PC_ 5
Positive: TBX21, CCR4, CX3CR1, INMT, ERN1, CSF3R, GATA3, IRF1, RORA, IRF4
ABCA3, ITGA1, XBP1, TGFB1, ALOX15B, FKBP5, IFITM1, SCN7A, IRF3, PDE4D
SPRY2, GDF15, ATF4, IL4R, TNFAIP3, FOXN3, PI16, SLC7A6, SMAD6, ITGB6
Negative: POSTN, CTHRC1, SMAD3, EGLN3, RORC, SPRY1, WNT5A, CEACAM6, IL5, NOS3
IL33, MMP1, CST1, KRT8, IL17RB, ALOX15, TFF3, LGALS3, ERN2, VCAM1
CLDN4, KLF7, CST4, FOXA2, FOXA3, DUSP1, ARNT, VEGFA, MUC16, ARG1
ipf1.xen <- RunUMAP(ipf1.xen, dims = 1:30)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
13:18:35 UMAP embedding parameters a = 0.9922 b = 1.112
13:18:35 Read 532668 rows and found 30 numeric columns
13:18:35 Using Annoy for neighbor search, n_neighbors = 30
13:18:35 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:20:43 Writing NN index file to temp file C:\Users\vauyeung\AppData\Local\Temp\RtmpsFHg92\file263879372dfc
13:20:46 Searching Annoy index using 1 thread, search_k = 3000
13:25:53 Annoy recall = 86.96%
13:25:56 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
13:26:11 77462 smooth knn distance failures
13:26:41 Initializing from normalized Laplacian + noise (using irlba)
13:43:07 Commencing optimization for 200 epochs, with 22519242 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:03:10 Optimization finished
ipf1.xen <- FindNeighbors(ipf1.xen, reduction = "pca", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", "nCount_ControlProbe", "nFeature_ControlProbe", 'AGR2','ITGB6'), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", :
All cells have the same value (1) of nCount_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", :
All cells have the same value (1) of nFeature_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.xen, features = c("PI16", "ABCA3", "XBP1", "TNFAIP3", "WNT5A", "IFITM1"), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.xen, features = c("COL1A1", "CTHRC1", "INMT", "SCN7A", "KRT8", "CLDN4"), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Clearly one issue here is that marker+ cells are smeared across multiple regions of the UMAP. Probably because we are trying too hard to separate with too few genes, especially since most of the 100 gene panel is minimally informative. Two strategies to consider, 1, reducing number of vectors used, or 2, forcing Seurat to use the favorite genes. Favor 1 if it works, in case there is some hidden informative marker we were not aware of.
ElbowPlot(ipf1.xen)
I mean yeah, maybe 10, but honestly probably 5.
DimHeatmap(ipf1.xen, dims = 1:12, cells = 1000, balanced = TRUE)
All our favorite genes show up by PC6. Kind of curious about these interferon genes including OAS1.
FeaturePlot(ipf1.xen, features = c("OAS1", "IRF1", "VEGFA", "ITGA1", "FKBP5", "FOXA2"), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
ipf1.xen <- RunUMAP(ipf1.xen, dims = 1:5)
14:09:58 UMAP embedding parameters a = 0.9922 b = 1.112
14:09:58 Read 532668 rows and found 5 numeric columns
14:09:58 Using Annoy for neighbor search, n_neighbors = 30
14:09:58 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:11:24 Writing NN index file to temp file C:\Users\vauyeung\AppData\Local\Temp\RtmpsFHg92\file26387d63f0
14:11:26 Searching Annoy index using 1 thread, search_k = 3000
14:16:27 Annoy recall = 89.29%
14:16:30 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:16:48 85882 smooth knn distance failures
14:17:15 Found 4 connected components, falling back to 'spca' initialization with init_sdev = 1
14:17:15 Using 'irlba' for PCA
14:17:15 PCA: 2 components explained 62.56% variance
14:17:15 Scaling init to sdev = 1
14:17:18 Commencing optimization for 200 epochs, with 18654360 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:41:12 Optimization finished
FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", "nCount_ControlProbe", "nFeature_ControlProbe", 'AGR2','ITGB6'), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", :
All cells have the same value (1) of nCount_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.xen, features = c("nFeature_Xenium", "nCount_Xenium", :
All cells have the same value (1) of nFeature_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.xen, features = c("COL1A1", "CTHRC1", "INMT", "SCN7A", "KRT8", "CLDN4"), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Annoyingly what this actually illustrates is that a lot of noise is driven by low feature cells, which is entirely sensible.
save(ipf1.xen, file='ipf1.xen.RObj')
On the theory that if there was only one feature, but it had 10 counts, that is probably still interpretable.
ipf1.filtered.xen <- subset(ipf1.xen, subset=nCount_Xenium>10)
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", "nCount_Xenium", "nCount_ControlProbe", "nFeature_ControlProbe", 'AGR2','ITGB6'), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", :
All cells have the same value (1) of nCount_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", :
All cells have the same value (1) of nFeature_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.filtered.xen, features = c("COL1A1", "CTHRC1", "INMT", "SCN7A", "KRT8", "CLDN4"), min.cutoff='q25', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
ipf1.filtered.xen <- NormalizeData(ipf1.filtered.xen)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ipf1.filtered.xen <- FindVariableFeatures(ipf1.filtered.xen)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ipf1.filtered.xen <- ScaleData(ipf1.filtered.xen, features=genes_to_use)
Centering and scaling data matrix
|
| | 0%
|
|============================================================================================================| 100%
ipf1.filtered.xen <- RunPCA(ipf1.filtered.xen, npcs = 6, features = genes_to_use)
PC_ 1
Positive: ITGB6, ABCA3, ALOX15B, KRT8, AGR2, CEACAM6, CSF3R, XBP1, GDF15, PARP14
FOXA2, CLDN4, ERN1, S100A9, LGALS3, VEGFA, PDE4D, PRDX6, ATF4, SMAD3
IFI6, SLC7A6, DDIT3, IRF3, IRF1, IL13RA1, IL17RB, RAF1, RORC, S100A8
Negative: COL1A1, CRISPLD2, INMT, SCN7A, ITGA1, POSTN, CTHRC1, IL33, EGLN3, VCAM1
SMAD6, NOS3, CST4, PI16, CST1, IL6, GATA3, FKBP5, SIGLEC8, TSLP
TFF2, PTGDR2, IL17A, NOS2, CCR6, RNASE3, IL4, CLC, TBX21, CLCA1
PC_ 2
Positive: S100A8, KLF4, S100A9, DUSP1, OAS1, IRF1, IFI6, SMAD7, TGFB1, ARRDC2
FKBP5, LGALS3, IL4R, TNFAIP3, PARP14, IL13RA1, IL17RB, SMAD6, FUT4, KLF7
RAF1, ATF4, FOXN3, GATA3, IL33, ITGA6, TNF, NOS3, EGLN3, DDIT3
Negative: ITGB6, COL1A1, ABCA3, ALOX15B, AGR2, CEACAM6, PDE4D, GDF15, FOXA2, CLDN4
SCN7A, CRISPLD2, CTHRC1, KRT8, WNT5A, VEGFA, SPRY2, POSTN, INMT, CST1
CCR4, ALOX15, SPRY1, ERN2, RORA, CST4, XBP1, FOXA3, TFF3, ERN1
PC_ 3
Positive: SMAD6, IL33, ITGA6, IFITM1, ITGA1, NOS3, EGLN3, DUSP1, SMAD7, IL4R
PDE4D, INMT, VEGFA, ATF4, ARRDC2, IL13RA1, GDF15, GATA3, CRISPLD2, CLDN4
RORA, ERN1, IRF3, ABCA3, SPRY2, FOXA2, ITGB6, VCAM1, PARP14, FOXN3
Negative: S100A9, S100A8, LGALS3, SMAD3, IL17RB, RORC, OAS1, IL5, TNF, CSF3R
FUT4, CTHRC1, CST1, MMP1, KRT8, SPRY1, DDIT3, ARG1, CDKN2A, CCR3
FOXA3, IFI6, MPO, CXCR3, CST4, MUC5AC, IFNA1, IFNB1, SERPINB2, IFNG
PC_ 4
Positive: SCN7A, COL1A1, CRISPLD2, FKBP5, INMT, RORA, FOXN3, PRDX6, ARNT, PDE4D
DDIT3, IRF3, LGALS3, SPRY1, RAF1, ATF4, TGFB1, FUT4, ERN1, CTHRC1
SLC7A6, SPRY2, CSF3R, VEGFA, ITGA1, TNFAIP3, IL13RA1, WNT5A, XBP1, VCAM1
Negative: SMAD6, IL33, NOS3, EGLN3, ITGA6, CEACAM6, DUSP1, SMAD3, IFITM1, GATA3
RORC, KRT8, ARRDC2, ALOX15, MMP1, SMAD7, CX3CR1, IL5, TFF3, KLF4
AGR2, ERN2, TBX21, ABCA3, ITGB6, MUC16, POSTN, FOXA3, MUC5AC, CCR3
PC_ 5
Positive: ALOX15, ERN2, TFF3, CLDN4, SPRY1, EGLN3, MMP1, LGALS3, IL33, RORC
MUC16, FOXA2, SMAD3, FOXN3, CEACAM6, VEGFA, AGR2, VCAM1, WNT5A, RAF1
ARNT, NOS3, IRF3, PRDX6, SLC7A6, DUSP1, IL17RB, SCN7A, KRT8, KLF7
Negative: ALOX15B, ABCA3, CSF3R, TBX21, CX3CR1, TGFB1, CCR4, ITGA1, ERN1, SMAD6
ITGB6, GATA3, S100A9, IRF4, INMT, FKBP5, GDF15, S100A8, IRF1, ITGA6
COL1A1, ARRDC2, XBP1, SMAD7, NOS2, ATF4, TNF, CCR6, FOXA3, KLF4
Warning: Number of dimensions changing from 30 to 6
ipf1.filtered.xen <- RunUMAP(ipf1.filtered.xen, dims = 1:5)
14:47:29 UMAP embedding parameters a = 0.9922 b = 1.112
14:47:29 Read 290139 rows and found 5 numeric columns
14:47:29 Using Annoy for neighbor search, n_neighbors = 30
14:47:29 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:48:13 Writing NN index file to temp file C:\Users\vauyeung\AppData\Local\Temp\RtmpsFHg92\file263891dedc
14:48:14 Searching Annoy index using 1 thread, search_k = 3000
14:50:57 Annoy recall = 100%
14:51:01 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:51:12 72 smooth knn distance failures
14:51:27 Initializing from normalized Laplacian + noise (using irlba)
14:51:54 Commencing optimization for 200 epochs, with 9657008 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:58:35 Optimization finished
FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", "nCount_Xenium", "nCount_ControlProbe", "nFeature_ControlProbe"), min.cutoff='q1', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", :
All cells have the same value (1) of nCount_ControlProbe.
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Warning in FeaturePlot(ipf1.filtered.xen, features = c("nFeature_Xenium", :
All cells have the same value (1) of nFeature_ControlProbe.
FeaturePlot(ipf1.filtered.xen, features = c("COL1A1", "CTHRC1", "INMT", "SCN7A", "TNFAIP3", "IFITM1"), min.cutoff='q1', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.filtered.xen, features = c("AGR2", "ITGB6", "XBP1", "ABCA3", "KRT8", "CLDN4"), min.cutoff='q1', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
FeaturePlot(ipf1.filtered.xen, features = c("OAS1", "IRF1", "VEGFA", "GDF15", "FKBP5", "FOXA2"), min.cutoff='q1', max.cutoff='q90')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
# modification to reduce cluster number. i think the sparsity of data means that pruned edges leads to more communities.
ipf1.filtered.xen <- FindNeighbors(ipf1.filtered.xen, reduction='pca', dims=1:5, prune.SNN=0)
Computing nearest neighbor graph
Computing SNN
ipf1.filtered.xen <- FindClusters(ipf1.filtered.xen, resolution = 0.4)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 290139
Number of edges: 22673759
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8658
Number of communities: 11
Elapsed time: 445 seconds
DimPlot(ipf1.filtered.xen, group.by = 'seurat_clusters')
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
key_markers <- c("COL1A1", "CTHRC1", "INMT", "SCN7A","PI16", "IFITM1","TNFAIP3", "WNT5A",'ITGB6','LGALS3',"KRT8", "CLDN4", "ABCA3",'GDF15', "XBP1",'OAS1')
DotPlot(ipf1.filtered.xen, features=key_markers) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
I can’t tell if 0, 1 or 5 are more inflammatory, among fibroblasts.
VlnPlot(subset(ipf1.filtered.xen, idents=c(0,1,5)), features=c('COL1A1','IFITM1', 'TNFAIP3','IRF1','OAS1'))
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
I think 5 is a bit more… Therefore rename as follows
ipf1.filtered.xen <- RenameIdents(object=ipf1.filtered.xen,
`0`='Unidentified',
`1`='Fib:Alveolar',
`2`='Fib:Fibrotic',
`3`='DATP:NOS',
`4`='Macrophage?',
`5`='Fib:Alv/Inflammed',
`6`='DATP:Fibrotic',
`7`='Airway-like',
`8`='Fib:NOS',
`9`='Fib:NOS',
`10`='Fib:NOS')
ipf1.filtered.xen$named_clusters <- Idents(ipf1.filtered.xen)
DimPlot(ipf1.filtered.xen, group.by = 'named_clusters', label=T)
Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`
Marker discovery for these groups. Just in case there is information hiding in the asthma genes.
all.markers <- FindAllMarkers(ipf1.filtered.xen, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster Unidentified
| | 0 % ~calculating
|++++ | 8 % ~18s
|++++++++ | 15% ~18s
|++++++++++++ | 23% ~16s
|++++++++++++++++ | 31% ~15s
|++++++++++++++++++++ | 38% ~13s
|++++++++++++++++++++++++ | 46% ~11s
|+++++++++++++++++++++++++++ | 54% ~09s
|+++++++++++++++++++++++++++++++ | 62% ~08s
|+++++++++++++++++++++++++++++++++++ | 69% ~06s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~05s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=20s
Calculating cluster Fib:Alveolar
| | 0 % ~calculating
|+++++ | 9 % ~16s
|++++++++++ | 18% ~19s
|++++++++++++++ | 27% ~15s
|+++++++++++++++++++ | 36% ~13s
|+++++++++++++++++++++++ | 45% ~11s
|++++++++++++++++++++++++++++ | 55% ~09s
|++++++++++++++++++++++++++++++++ | 64% ~07s
|+++++++++++++++++++++++++++++++++++++ | 73% ~05s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=18s
Calculating cluster Fib:Fibrotic
| | 0 % ~calculating
|+++++++++ | 17% ~09s
|+++++++++++++++++ | 33% ~07s
|+++++++++++++++++++++++++ | 50% ~05s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
Calculating cluster DATP:NOS
| | 0 % ~calculating
|+++++ | 9 % ~15s
|++++++++++ | 18% ~13s
|++++++++++++++ | 27% ~12s
|+++++++++++++++++++ | 36% ~10s
|+++++++++++++++++++++++ | 45% ~09s
|++++++++++++++++++++++++++++ | 55% ~07s
|++++++++++++++++++++++++++++++++ | 64% ~07s
|+++++++++++++++++++++++++++++++++++++ | 73% ~05s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=18s
Calculating cluster Macrophage?
| | 0 % ~calculating
|+++ | 6 % ~24s
|++++++ | 11% ~23s
|+++++++++ | 17% ~23s
|++++++++++++ | 22% ~21s
|++++++++++++++ | 28% ~20s
|+++++++++++++++++ | 33% ~18s
|++++++++++++++++++++ | 39% ~17s
|+++++++++++++++++++++++ | 44% ~15s
|+++++++++++++++++++++++++ | 50% ~14s
|++++++++++++++++++++++++++++ | 56% ~12s
|+++++++++++++++++++++++++++++++ | 61% ~11s
|++++++++++++++++++++++++++++++++++ | 67% ~09s
|+++++++++++++++++++++++++++++++++++++ | 72% ~08s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~06s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~05s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=28s
Calculating cluster Fib:Alv/Inflammed
| | 0 % ~calculating
|++++++++ | 14% ~10s
|+++++++++++++++ | 29% ~08s
|++++++++++++++++++++++ | 43% ~07s
|+++++++++++++++++++++++++++++ | 57% ~05s
|++++++++++++++++++++++++++++++++++++ | 71% ~03s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=11s
Calculating cluster DATP:Fibrotic
| | 0 % ~calculating
|+++ | 6 % ~24s
|++++++ | 12% ~22s
|+++++++++ | 18% ~20s
|++++++++++++ | 24% ~19s
|+++++++++++++++ | 29% ~17s
|++++++++++++++++++ | 35% ~16s
|+++++++++++++++++++++ | 41% ~16s
|++++++++++++++++++++++++ | 47% ~14s
|+++++++++++++++++++++++++++ | 53% ~12s
|++++++++++++++++++++++++++++++ | 59% ~11s
|+++++++++++++++++++++++++++++++++ | 65% ~09s
|++++++++++++++++++++++++++++++++++++ | 71% ~08s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~06s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~05s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=26s
Calculating cluster Airway-like
| | 0 % ~calculating
|++++ | 8 % ~17s
|++++++++ | 15% ~15s
|++++++++++++ | 23% ~14s
|++++++++++++++++ | 31% ~13s
|++++++++++++++++++++ | 38% ~11s
|++++++++++++++++++++++++ | 46% ~11s
|+++++++++++++++++++++++++++ | 54% ~10s
|+++++++++++++++++++++++++++++++ | 62% ~08s
|+++++++++++++++++++++++++++++++++++ | 69% ~06s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~05s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=21s
Calculating cluster Fib:NOS
| | 0 % ~calculating
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
all.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(ipf1.filtered.xen, features = top10$gene) + NoLegend()
save(ipf1.filtered.xen, file='ipf1.filtered.xen.RObj')
# default identity, the cells that were excluded based on low counts.
ipf1.xen$named_clusters <- 'Low Count'
# Convert assignments in subset to vector (not factor)
clusterIDs <- as.character(ipf1.filtered.xen$named_clusters)
names(clusterIDs) <- names(ipf1.filtered.xen$named_clusters)
# replace elements of the superset based on name in the subset
ipf1.xen$named_clusters[names(clusterIDs)] <- clusterIDs
# convert superset back to factor
ipf1.xen$named_clusters <- as.factor(ipf1.xen$named_clusters)
Break down the assignments:
ipf1.xen@meta.data %>% group_by(named_clusters) %>% summarise(no_rows=length(named_clusters))
Update saved object
save(ipf1.xen, file='ipf1.xen.RObj')
For version 1.2.0, the instructons say:
“Visualize custom cells groups in Xenium Explorer by importing a CSV file specifying cell ids and corresponding group names. Build a CSV file containing each cell id in the first column and the group the cell should be assigned to in the second column. A cell may only be assigned to one group. Not all cells in the dataset need to be included.
Ensure the columns have headers named “cell_id” and “group”.”
This is basically the named_clusters list with a header and saved as a CSV.
scratch <- tibble(cell_id = names(ipf1.xen$named_clusters),
group = ipf1.xen$named_clusters)
write.csv(scratch, 'Seurat_named_clusters.csv')
Found a region within Xenium explorer and figured the coordinates.
cropped.coords <- Crop(ipf1.xen[["fov"]], y = c(2150, 2750), x = c(5650, 6350), coords = "plot")
ipf1.xen[["fibroblastic1"]] <- cropped.coords
Warning: Key ‘Xenium_’ taken, using ‘fibroblastic1_’ instead
# visualize cropped area with cell segmentations & selected molecules
DefaultBoundary(ipf1.xen[["fibroblastic1"]]) <- "segmentation"
Warning: Adding image with unordered cells
ImageDimPlot(ipf1.xen, fov='fibroblastic1',group.by = 'named_clusters', axes = TRUE)
ImageDimPlot(ipf1.xen, fov = "fibroblastic1", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome",
coord.fixed = FALSE, molecules = c("ITGB6", "GDF15", "CTHRC1", "ABCA3", "CLDN4"), nmols = 10000)
ImageFeaturePlot(ipf1.xen, fov='fibroblastic1',features=c("ITGB6"), axes = TRUE)
Warning: Could not find ITGB6 in the default search locations, found in ‘Xenium’ assay instead
ImageFeaturePlot(ipf1.xen, fov='fibroblastic1',features=c("GDF15"), axes = TRUE)
Warning: Could not find GDF15 in the default search locations, found in ‘Xenium’ assay instead
ImageFeaturePlot(ipf1.xen, fov='fibroblastic1',features=c("CTHRC1"), axes = TRUE)
Warning: Could not find CTHRC1 in the default search locations, found in ‘Xenium’ assay instead
ipf1.xen[["alveolar1"]] <- cropped.coords
Warning: Key ‘Xenium_’ taken, using ‘alveolar1_’ instead
# visualize cropped area with cell segmentations & selected molecules
DefaultBoundary(ipf1.xen[["alveolar1"]]) <- "segmentation"
Warning: Adding image with unordered cells
ImageDimPlot(ipf1.xen, fov='alveolar1',group.by = 'named_clusters', axes = TRUE)
ImageDimPlot(ipf1.xen, fov = "alveolar1", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome",
coord.fixed = FALSE, molecules = c("ITGB6", "GDF15", "CTHRC1", "ABCA3", "CLDN4"), nmols = 10000)
ImageFeaturePlot(ipf1.xen, fov='alveolar1',features=c("ITGB6"), axes = TRUE)
ImageFeaturePlot(ipf1.xen, fov='alveolar1',features=c("GDF15"), axes = TRUE)
ImageFeaturePlot(ipf1.xen, fov='alveolar1',features=c("CTHRC1"), axes = TRUE)
Supposedly able to learn what cell groupings might be occurring.
ipf1.xen <- BuildNicheAssay(object = ipf1.xen, fov = "fov", group.by = "named_clusters",
niches.k = 10, neighbors.k = 15)
Computing nearest neighbor graph
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 38443772 2053.2 122162082 6524.2 122162082 6524.2
Vcells 1571450967 11989.3 3184002539 24292.1 3183855140 24290.9
Update saved object
save(ipf1.xen, file='ipf1.xen.RObj')
table(ipf1.xen$named_clusters, ipf1.xen$niches)
1 2 3 4 5 6 7 8 9 10
Airway-like 55 2871 16 191 360 185 206 191 160 712
DATP:Fibrotic 438 264 51 872 951 610 363 13192 264 4993
DATP:NOS 1778 990 225 2884 3790 2081 1344 7515 1406 22795
Fib:Alv/Inflammed 312 24 126 12443 3459 3344 4240 977 2693 2236
Fib:Alveolar 82 53 268 7750 5466 7870 20659 1402 6318 2414
Fib:Fibrotic 57 39 957 6002 8288 5110 6441 764 18474 2360
Fib:NOS 0 0 236 0 0 0 0 0 0 0
Low Count 2139 1224 1316 23571 129768 22615 14441 5972 18759 22724
Macrophage? 18182 96 38 2081 1792 1632 1000 1887 594 4737
Unidentified 182 83 131 7087 6876 24726 6714 1910 3872 3902
ImageDimPlot(ipf1.xen,fov='fibroblastic1' , group.by = "niches", size = 1.5)
ImageDimPlot(ipf1.xen,fov='alveolar1' , group.by = "niches", size = 1.5)
Niche analysis seems to find mostly homotypic niches, i.e., THE EPITHELIUM or THE INTERSTITIUM. Not sure how to make it find heterotypic regions or ask the simple question of where a niche tends to be relative to another.
Many questions which are of the form, Given a specific cell (or cell type), what cells are within a specified radius and what kinds of cells are they.
In general this seems like a question for a kd-tree which is of fortunately low dimensionality (x and y) and there are off the shelf packages for dealing with it, mostly overkill because it is intended for high-dimensional data, but at least presumably fast.
# Some kludging here to generate a matrix with rownames = cell names
centroids <- ipf1.xen@images$fov$centroids@coords
rownames(centroids) <- ipf1.xen@images$fov$centroids@cells # pull these directly from the centroids object
head(centroids)
x y
aaaablkg-1 838.8134 7267.000
aaaabmhb-1 821.4545 7273.264
aaaacaoa-1 843.7526 7274.842
aaaacega-1 840.4955 7284.887
aaaachfb-1 830.5627 7287.562
aaaadhfk-1 847.2108 7296.594
# Force cell name order to be the same as in the Seurat object (probably is, but won't hurt)
centroids <- centroids[Cells(ipf1.xen),]
head(centroids)
x y
aaaablkg-1 838.8134 7267.000
aaaabmhb-1 821.4545 7273.264
aaaacaoa-1 843.7526 7274.842
aaaacega-1 840.4955 7284.887
aaaachfb-1 830.5627 7287.562
aaaadhfk-1 847.2108 7296.594
DivingIntoGeneticsAndGenomics points out that dbscan is a good package for doing the kd-tree.
library(dbscan)
radius=100 # same units as the coordinates, typically microns)
nn <- frNN(x=centroids,eps=radius)
head(nn$id, n=2)
$`aaaablkg-1`
[1] 3 198 196 213644 197 4 2 213748 195 5 194 193 213643 213649 213745 192
[17] 39758 213741 39764 39776 213749 6 39779 213744 213654 39774 213740 191 7 190 39766 213653
[33] 39918 213648 189 8 39782 213739 188 213647 39772 213747 213650 39778 39914 213661 39763 213659
[49] 213734 213731 213732 39767 39913 39773 10 39783 213742 11 39916 39790 187 39785 9 39768
[65] 39760 213668 213660 39780 213733 185 14 39910 39781 20853 186 213726 39793 213656 39771 213646
[81] 12 213729 13 213652 213743 39917 213657 213670 39759 39909 184 213677 39791 39912 213675 213666
[97] 39765 39787 39697 213746 183 15 39899 39698 17 213730 16 39788 213737 213725 213679 213665
[113] 213673 20854 214069 213645 39904 39699 39911 213738 39775 182 39895 18 39757 213663 19 39769
[129] 213651 39789 39794 214070 213533 39761 39894 39705 213728 213674 39784 214005 39702 21 39908 39892
[145] 213530 213736 39897 213658 214065 20 213538 39709 23 181 39694 39701 39893 22 39710 213676
[161] 214006 39902 39696 213655 213727 213671 213667 180 213996 214066 213539 39704 214032 39903 213664 214007
[177] 213735 213997 213992 39890 179 39713 39919 214067 213672 24 213531 213537 39907 39770 39880 39762
[193] 39792 178 213568 213662 39703 214033 26 214061 214059 39877 39712 39719 39891 25 213564 213548
[209] 213534 213678 27 39720 39889 214013 213669 214068 213546 39900 214063 39777 39714 39707 213536 39716
[225] 213991 39883 213987 214021 39906 213532 213543 176 175 39724 213550 213529 214002 39876 214034 177
[241] 213982 214057 39708 213565 213566 39888 213552 214014 214035 214054 213542 214060 214020 39706 214026 39786
[257] 213544 214062 28 39717 39722 20855 39898 213535 214003 213570 213980 39715 29 213540 39881 39729
[273] 214055 39726 214017 213553 39721 214023 39915 39829 39718 213988 214029 214064 39885 39733 39905 214052
[289] 214004
$`aaaabmhb-1`
[1] 39776 39774 198 39764 5 39779 1 39766 39772 39758 39778 4 3 195 39782 39773
[17] 8 39767 196 39783 39763 7 39768 39780 39918 194 39785 213644 39771 39781 197 39790
[33] 6 193 39760 191 213649 9 213748 39914 39787 39759 213654 39793 11 39916 192 39765
[49] 39791 213643 10 39788 213741 12 213653 189 39917 213745 39913 39697 39775 213661 190 39789
[65] 39769 39910 39698 13 188 213749 14 213744 213740 39794 213659 39784 213648 39912 39699 15
[81] 39909 39761 213668 39757 213650 186 17 213647 39911 185 39702 39694 39705 213731 213739 187
[97] 213677 213660 16 213732 18 213747 39701 213734 39696 213670 39904 213675 39899 39709 39908 20854
[113] 19 213679 39770 20853 213656 39792 213742 183 39704 213657 213666 39710 184 39895 39762 213726
[129] 20 39919 213673 213652 213733 39703 213533 39713 39897 213646 21 23 39777 213729 39712 39902
[145] 39894 39907 39903 213665 213743 39707 22 39892 39893 39719 182 39714 214069 213538 39720 39786
[161] 213746 39708 25 39716 213725 213539 39890 213674 39706 213730 213530 213663 26 213737 181 24
[177] 39906 39900 39891 39724 213645 39717 213651 39715 39722 39889 214070 27 39880 213738 39721 213548
[193] 180 39718 213676 39695 39915 213658 213728 214065 39729 39726 39700 213568 213671 39877 39898 39883
[209] 214005 213537 213546 28 213564 178 39888 213667 39728 213531 39725 39905 213736 179 213655 213550
[225] 39733 213672 213664 39876 213552 39730 214066 214006 20855 29 213534 39881 39885 213727 39711 176
[241] 213996 39723 214032 39896 213536 213566 214059 39735 213662 213678 213543 39734 214067 214007 39884 213565
[257] 214061 213553 213997 214033 213735 213992 177 213669 39732 213532 175 39886 213544 213529 39727 213555
[273] 213542 39736
# Verify cells are still in the same order
all.equal(colnames(ipf1.xen), names(nn$id))
[1] TRUE
# Convert the frNN object into a dataframe
nn_df <- stack(nn$id)
nn_df$dist <- stack(nn$dist)$values
head(nn_df)
# values are the indices in names(xenium object) so get target cells' names and annotations
nn_df$neighbor <- Cells(ipf1.xen)[nn_df$values]
# Cluster ID
cluster_ids<- ipf1.xen$named_clusters %>% unname()
nn_df$ind_cluster <- cluster_ids[nn_df$ind]
nn_df$neighbor_cluster <- cluster_ids[nn_df$values]
# Niche ID
niche_ids<- ipf1.xen$niches %>% unname()
Error in `x[[i, drop = TRUE]]`:
! ‘niches’ not found in this Seurat object
Backtrace:
1. ipf1.xen$niches %>% unname()
4. SeuratObject:::`$.Seurat`(ipf1.xen, niches)
6. SeuratObject:::`[[.Seurat`(x, i, drop = TRUE)
I think this can all be done with tidyverse.
#graphs
ggplot(DATP.fib_df, aes(x=ind_cluster, y=n)) + geom_boxplot()
ggplot(DATP.fib_df, aes(x=ind_cluster, y=min_dist)) + geom_boxplot()
ggplot(DATP.fib_df, aes(x=ind_cluster, y=mean_dist)) + geom_boxplot()
nn_df %>% filter(ind_cluster == 'DATP:NOS') %>%
filter(neighbor_cluster == 'DATP:Fibrotic') %>%
group_by(ind) %>%
summarise(ind_cluster=first(ind_cluster), # all ind_cluster should be the same for each ind
mean_dist = mean(dist),
n=n(),
min_dist = min(dist)) -> DATP.DATP_df
# quick summary statistics
DATP.DATP_df %>%
group_by(ind_cluster) %>%
summarise(mean_dist = mean(mean_dist), mean_n=mean(n), median_n=median(n), mean_mindist=mean(min_dist))
Identify cells that are general DATPs that are preferentially in a context of other general DATPs. Let us say for instance that within a radius of 50 microns, there are going to be around 25 cells on average, and only one or two should be fibrotic DATPs.
nn_df %>% filter(ind_cluster == 'DATP:NOS') %>%
filter(neighbor_cluster == 'DATP:Fibrotic') %>%
filter(dist<50) %>%
group_by(ind) %>%
summarise(ind_cluster=first(ind_cluster), # all ind_cluster should be the same for each ind
n=n(),
min_dist = min(dist)) %>%
filter(n<3) -> regen.DATP_df
head(regen.DATP_df)
By contrast identify fibrotic DATPs that are in the context of a lot of fibrotic DATPs, say, greater than 15 in their 50 um radius.
nn_df %>% filter(ind_cluster == 'DATP:Fibrotic') %>%
filter(neighbor_cluster == 'DATP:Fibrotic') %>%
filter(dist<50) %>%
group_by(ind) %>%
summarise(ind_cluster=first(ind_cluster), # all ind_cluster should be the same for each ind
n=n(),
min_dist = min(dist)) %>%
filter(n>15) -> fibrotic.DATP_df
head(fibrotic.DATP_df)
Now repeat fibrotic fibroblast analysis based on this stringent identification of DATPs that are situated in clusters.
nn_df %>% filter(ind %in% regen.DATP_df$ind | ind %in% fibrotic.DATP_df$ind) %>%
filter(neighbor_cluster == 'Fib:Fibrotic') %>%
filter(dist<50) %>%
group_by(ind) %>%
summarise(ind_cluster=first(ind_cluster), # all ind_cluster should be the same for each ind
mean_dist = mean(dist),
n=n(),
min_dist = min(dist)) -> strictDATP.fib_df
# quick summary statistics
strictDATP.fib_df %>%
group_by(ind_cluster) %>%
summarise(mean_dist = mean(mean_dist), mean_n=mean(n), median_n=median(n), mean_mindist=mean(min_dist))
#graphs
ggplot(strictDATP.fib_df, aes(x=ind_cluster, y=n)) + geom_boxplot()
ggplot(strictDATP.fib_df, aes(x=ind_cluster, y=min_dist)) + geom_boxplot()
ggplot(strictDATP.fib_df, aes(x=ind_cluster, y=mean_dist)) + geom_boxplot()
Try gene expression based on fibroblasts within 50 um of these identities.
neighbor.Fib.xen <- merge(regen.Fib.xen,y=fibDATP.Fib.xen)
Warning: Key ‘Xenium_’ taken, using ‘fov2_’ instead
VlnPlot(neighbor.Fib.xen, c('COL1A1','CTHRC1','FN1'), group.by = 'neighbor')
Warning: The following requested variables were not found: FN1
Maybe the issue is that we don’t have enough markers here to distinguish fibrotic DATPs from regular DATPs, and they are intermixed anyway.
# Nonparametric statistical testing
wilcox.test(filter(DATP.fib_df, ind_cluster=='DATP:Fibrotic')$min_dist,
filter(DATP.fib_df, ind_cluster=='Airway-like')$min_dist,
alternative='two.sided')
Wilcoxon rank sum test with continuity correction
data: filter(DATP.fib_df, ind_cluster == "DATP:Fibrotic")$min_dist and filter(DATP.fib_df, ind_cluster == "Airway-like")$min_dist
W = 41072937, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
wilcox.test(filter(DATP.fib_df, ind_cluster=='DATP:Fibrotic')$n,
filter(DATP.fib_df, ind_cluster=='Airway-like')$n,
alternative='two.sided')
Wilcoxon rank sum test with continuity correction
data: filter(DATP.fib_df, ind_cluster == "DATP:Fibrotic")$n and filter(DATP.fib_df, ind_cluster == "Airway-like")$n
W = 72022109, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
ggplot(DATP.fib_df, aes(x=ind_cluster, y=n)) +
geom_boxplot(outlier.shape = NA) +
theme_classic() +
ylim(0,50)
Warning: Removed 1128 rows containing non-finite values (`stat_boxplot()`).
ggplot(DATP.fib_df, aes(x=ind_cluster, y=min_dist)) +
geom_boxplot(outlier.shape = NA) +
theme_classic()
ggplot(DATP.fib_df, aes(x=ind_cluster, y=mean_dist)) + geom_boxplot(outlier.shape = NA) + theme_classic()
Builds off the cell radius tables from above.
First, ask analogous questions except based on niche.
nn_df %>% filter(ind_cluster == 'Airway-like') %>%
filter(str_detect(neighbor_cluster, 'Fib:')) %>%
filter(dist<50) -> regen.Fib_df
nn_df %>% filter(ind_cluster == 'DATP:Fibrotic') %>%
filter(str_detect(neighbor_cluster, 'Fib:')) %>%
filter(dist<50) -> fibDATP.Fib_df
# Fibs near regenerative zones may not also be part of a fibrotic zone and vice versa
RegenDATP.fib.cells <- setdiff(unique(regen.Fib_df$neighbor),unique(fibDATP.Fib_df$neighbor))
FibDATP.fib.cells <- setdiff(unique(fibDATP.Fib_df$neighbor),unique(regen.Fib_df$neighbor))
# make seurat subsets based on these identities
fibDATP.Fib.xen <- subset(ipf1.xen, cells=FibDATP.fib.cells)
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating Seurat objects
fibDATP.Fib.xen$neighbor <- 'FibDATP'
regen.Fib.xen <- subset(ipf1.xen, cells=RegenDATP.fib.cells)
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating Centroids objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating FOV objects
Warning: Not validating Seurat objects
regen.Fib.xen$neighbor <- 'RegenDATP'
neighbor.Fib.xen <- merge(regen.Fib.xen,y=fibDATP.Fib.xen)
Warning: Key ‘Xenium_’ taken, using ‘fov2_’ instead